###############################################################################################################################################################################
#1. INTRODUCTION
#title: Analysis code (calculation of data) for Supran, Rahmstorf, and Oreskes (2022) 'Assessing ExxonMobil’s global warming projections'
#author: Geoffrey Supran
#date: 3 June 2022
#output(s): .rda in dir = 'Data/'; .rda and .csv in 'Results/Skill Score/'; .csv in 'Results/Temp Change/'; .csv in 'Results/iTCR/'; and .csv in 'Results/Detectable Warming/'
#description: This is the code used to calculate the following, as reported in Supran, Rahmstorf, and Oreskes (2022): 
# - Temperature change per decade [(section 3) projection data; (section 5) historical data]
# - Implied transient climate response (iTCR) [(section 4) projection data; (section 6) historical data]
# - Modeling 'skill scores' for temperature change [section 7; combined with skill scores for iTCR in section 9, yielding table 1].
# - Modeling 'skill scores' for iTCR [section 8; combined with skill scores for temperature change in section 9, yielding table 1].
# - Difference (projected minus observed) in temperature change per decade [section 10]
# - Difference (projected minus observed) in iTCR [section 11]
# - Temperature change corresponding to an increase in atmospheric CO2 concentration from 450ppm to 600ppm, as indicated by Glaser (1982, fig.3) and Shaw (1984) [section 12].
# - Bootstrapped standard error of the median year by which Exxon predicted AGW would be detectable [section 13]
# - Ratio of each rate of external forcing increase reported by ExxonMobil versus the corresponding mean estimate of observational forcings over the projection period [section 14].

#These calculations are performed on the pre-processed data generated in Preprocess.R and saved to dir = 'Data/'. The calculations are performed using the following script run in R, then output to dir = 'Data/' and dir = 'Results/'.
###############################################################################################################################################################################
#2. SETUP
library(readxl)
library(ggplot2)
library(egg)
library(tidyr)
library(forecast)
library(ggrepel)
library(boot)
setwd("###/")
###############################################################################################################################################################################
#3. MAIN CODE: Calculate temperature change per decade (TC) for PROJECTION data
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

endyear = 2019

TC_all_proj = data.frame()
cats = unique(proj$cat)
CO2scens = unique(proj$CO2scen)
for(i in 1:length(cats)){
  for(j in 1:length(CO2scens)){
    temp = proj[proj$cat == toString(cats[i]),]
    tempb = temp[temp$CO2scen == toString(CO2scens[j]),]
    
    if(nrow(tempb) <= 1){
      next
    }
    if(nrow(tempb) == 2){ #For the occasional case where we have only 2 data points (notably 1997 Kheshgi), simply calculate TC from the straight line joining those two points. Set error bars equal to zero.
      TC = 10*(tempb$dT[2] - tempb$dT[1])/(tempb$year[2] - tempb$year[1])
      TC_high = TC
      TC_low = TC
    } else{
      temp2 = tempb[tempb$year>=startyears[i,2] & tempb$year<=endyear,]
      
      df = list()
      #If temp data has no sd error bars, then simply fit to the original data points.
      if(sum(is.na(temp2$sd_high)) == nrow(temp2) & sum(is.na(temp2$sd_low)) == nrow(temp2)){
        df[[1]] = temp2$dT
      } else{
        df[[1]] = temp2$dT
        df[[2]] = temp2$dT + temp2$sd_high
        df[[3]] = temp2$dT - temp2$sd_low
      }
      
      for(x in 1:length(df)){
        
        #Run OLS model
        if(length(df) == 1){
          TCtemp = lm(dT ~ year, data = temp2)
        }
        if(length(df) == 3){
          if(x == 1){
            TCtemp = lm(dT ~ year, data = temp2)
          }
          if(x == 2){
            TCtemp = lm(dT+sd_high ~ year, data = temp2)
          }
          if(x == 3){
            TCtemp = lm(dT-sd_low ~ year, data = temp2)
          }
        }
        TC_val = TCtemp$coefficients[2]  
        
        #Run AR(1) model
        if(cats[i] == '1980 Shaw; 1982 Glaser (Figure9)'){ #For '1980 Shaw; 1982 Glaser (Figure9)' only, AR(1) model generates error due to a coincidence of perfectly correlated data. Therefore force confidence intervals to be based on OLS, not AR(1)
          ci_upper = confint(TCtemp, 'year', level=0.95)[2]
          ci_lower = confint(TCtemp, 'year', level=0.95)[1]
        } else{var_AR1 = arima(df[[x]], #else A
                               order = c(1,0,0),
                               xreg = temp2$year)
        
        #Compute confidence intervals for each regression method
        ols_ci = confint(TCtemp, 'year', level=0.95)[2] - TC_val #Compute confidence interval for OLS regression.
        arima_ci = confint(var_AR1, level = 0.95)[3,2] - var_AR1$coef[3] #Compute confidence interval for AR(1) regression.
        
        if(arima_ci > ols_ci){
          ci_upper = confint(var_AR1, level = 0.95)[3,2] - var_AR1$coef[3] + TC_val
          ci_lower = confint(var_AR1, level = 0.95)[3,1] - var_AR1$coef[3] + TC_val
        } else{
          ci_upper = confint(TCtemp, 'year', level=0.95)[2]
          ci_lower = confint(TCtemp, 'year', level=0.95)[1]
        }
        } #close elseA
        ci_val = ci_upper - TC_val
        
        #Compute total uncertainty
        if(length(df) == 1){
          TC = 10*TC_val
          uncertainty = ci_val
          TC_high = 10*(TC_val + uncertainty)
          TC_low = 10*(TC_val - uncertainty)
        }
        
        if(length(df) == 3){
          if(x == 1){
            TC = 10*TC_val
          }
          if(x == 2){
            TC_high = 10*(TC_val + ci_val)
          }
          if(x ==3){
            TC_low = 10*(TC_val - ci_val)
          }
        }
      }#close x for loop
    }#close else loop
    
    temp = data.frame(cbind(cat = cats[i],CO2scen = CO2scens[j],TC, TC_high, TC_low))
    TC_all_proj = rbind(TC_all_proj, temp)
    
  }#close j loop
}#close i loop
dir = 'Data/'
fname = 'TC_all_proj.rda'
save(TC_all_proj, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#4. MAIN CODE: Calculate implied transient climate response (iTCR) for PROJECTION data
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

endyear = 2019

TCR_all_proj = data.frame()
cats = unique(proj$cat)
CO2scens = unique(proj$CO2scen)
for(i in 1:length(cats)){
  for(j in 1:length(CO2scens)){
    
    temp = proj[proj$cat == toString(cats[i]),]
    tempb = temp[temp$CO2scen == toString(CO2scens[j]),]
    
    if(nrow(tempb) == 0){
      next
    }else{ #else1
      if(cats[i] == '2001 Albritton'){ #'2001 Albritton' dataset comes with forcings already provided (instead of CO2)
        tempb$F = tempb$CO2
        tempb = tempb[,-5]
      } 
      else{ #else2
        tempb$F = 5.35*log(tempb$CO2 / tempb$CO2[1])
        
        if(cats[i] == '1977 Black; 1979 Mastracchio' | cats[i] == '1980 Shaw; 1982 Glaser (Figure9)'){ #For documents i=1 and i=2, CO2 values are available only for certain years. Therefore only compute F values starting from the first year for which we have CO2 data. 
          ind = which(!is.na(tempb$CO2))[1] #Find first non-NA value of CO2 data
          tempb$F = 5.35*log(tempb$CO2 / tempb$CO2[ind])
          
          if(cats[i] == '1980 Shaw; 1982 Glaser (Figure9)'){ #For document i=2, we need to linearly scale computed F values (computed from explicitly stated CO2 data) by temperature in order to obtain F values within the time period of interest (startyear through endyear).
            ind2 = which(!is.na(tempb$CO2))[2] #Find second non-NA value of CO2 data
            ind3 = which(tempb$year==endyear)
            for(z in seq(ind+1,ind3,1)){
              tempb$F[z] = (tempb$dT[z] / tempb$dT[ind2]) * tempb$F[ind2] #Linearly scale F by temperatures in order to get F for endyear (2019)
            }
          }
          
        }
      }#close else2
      
      temp2 = tempb[tempb$year>=startyears[i,2] & tempb$year<=endyear,]
      if(nrow(tempb) == 2){ #For the occasional case where we have only 2 temperature (as well as forcing) data points (notably 1997 Kheshgi), simply calculate TCR from the straight line joining those two points. 
        TCR = 3.7*(tempb$dT[2] - tempb$dT[1])/(tempb$F[2] - tempb$F[1])
        TCR_high = TCR
        TCR_low = TCR
      } else{ #else3
        df = list()
        #If temp data has no sd error bars, then simply fit to the original data points.
        if(sum(is.na(temp2$sd_high)) == nrow(temp2) & sum(is.na(temp2$sd_low)) == nrow(temp2)){
          df[[1]] = temp2$dT
        } else{
          df[[1]] = temp2$dT
          df[[2]] = temp2$dT + temp2$sd_high
          df[[3]] = temp2$dT - temp2$sd_low
        }
        
        sd_temp = data.frame(matrix(NA, nrow = 3, ncol = 3))
        colnames(sd_temp) = c('x','tcr','varTREND')
        for(x in 1:length(df)){
          
          #Run OLS model
          if(length(df) == 1){
            TCRtemp = lm(dT ~ F, data = temp2)
          }
          if(length(df) == 3){
            if(x == 1){
              TCRtemp = lm(dT ~ F, data = temp2)
            }
            if(x == 2){
              TCRtemp = lm(dT+sd_high ~ F, data = temp2)
            }
            if(x == 3){
              TCRtemp = lm(dT-sd_low ~ F, data = temp2)
            }
          }
          TCR_val = TCRtemp$coefficients[2]
          
          ci_upper = confint(TCRtemp, 'F', level=0.95)[2] #Compute confidence interval for OLS regression
          ci_val = ci_upper - TCR_val
          
          if(length(df) == 1){
            TCR = 3.7*TCR_val
            uncertainty = sqrt(ci_val^2)
            TCR_high = 3.7*(TCR_val + uncertainty)
            TCR_low = 3.7*(TCR_val - uncertainty)
          }
          
          if(length(df) == 3){
            sd_temp$x[x] = x
            sd_temp$tcr[x] = TCR_val
            sd_temp$varTREND[x] = ci_val
            
            if(x == 1){
              TCR = 3.7*TCR_val
            }
            if(x ==2){
              TCR_high = 3.7*(TCR_val + ci_val)
            }
            if(x == 3){
              TCR_low = 3.7*(TCR_val - ci_val)
            }
          }
          
        }#close x for loop
      } #close else3
      temp = data.frame(cbind(cat = cats[i],CO2scen = CO2scens[j],TCR, TCR_high, TCR_low))
      TCR_all_proj = rbind(TCR_all_proj, temp)
    }#close else1
    
  }#close j loop
}#close i loop
dir = 'Data/'
fname = 'TCR_all_proj.rda'
save(TCR_all_proj, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#5. MAIN CODE: Calculate temperature change per decade (TC) for HISTORICAL data
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'historical.rda'
load(file = paste(dir,fname,sep=''))

endyear = 2019

SD_TC = data.frame()
TC_all_hist = data.frame()
cats_proj = unique(startyears$cat)
cats_hist = unique(hist$cat)
CO2scens = unique(proj$CO2scen)
for(i in 1:length(cats_proj)){
  for(j in 1:length(CO2scens)){
    sd_temp = data.frame(cat = character(), CO2scen = character(), tc = double(), varAR1 = double())
    for(k in 1:length(cats_hist)){
      temp = hist[hist$cat == toString(cats_hist[k]),]
      temp2 = temp[temp$year>=startyears[i,2] & temp$year<=endyear,]
      temp2$year = as.numeric(temp2$year)
      temp2$dT = as.numeric(temp2$dT)
      
      TCtemp = lm(dT ~ year, data = temp2)
      sd_temp[k,1] = toString(cats_hist[k])
      sd_temp[k,2] = toString(CO2scens[j])
      sd_temp[k,3] = TCtemp$coefficients[2]
      
      #Run AR(1) model
      var_AR1 = arima(temp2$dT, 
                      order = c(1,0,0),
                      xreg = temp2$year)
      
      #Compute confidence intervals for each regression method
      ols_ci = confint(TCtemp, 'year', level=0.95)[2] - TCtemp$coefficients[2]
      arima_ci = confint(var_AR1, level = 0.95)[3,2] - var_AR1$coef[3]
      
      if(arima_ci > ols_ci){
        ci_upper = confint(var_AR1, level = 0.95)[3,2] - var_AR1$coef[3] + TCtemp$coefficients[2]
        ci_lower = confint(var_AR1, level = 0.95)[3,1] - var_AR1$coef[3] + TCtemp$coefficients[2]
      }
      else{
        ci_upper = confint(TCtemp, 'year', level=0.95)[2]
        ci_lower = confint(TCtemp, 'year', level=0.95)[1]
      }
      ci_val = ci_upper - TCtemp$coefficients[2]
      sd_temp[k,4] = ci_val
      
      #Save out SD_TC, which summarises TC results (tc) and corresponding standard error of each tc estimate (sd) for each individual combination of projection (cats_proj[i]), projection CO2 scenario (CO2scens[j]), and historical dataset (cats_hist[k]). We will use these combinations to compute Skill Scores (SS) later.
      SD_TC_temp = data.frame(cbind(cat_proj = cats_proj[i], CO2scen = CO2scens[j], cat_hist = cats_hist[k], tc = 10*TCtemp$coefficients[2], sd = 10*summary(TCtemp)[[4]][2,2]))
      SD_TC = rbind(SD_TC,SD_TC_temp)
    }#close k loop
    
    TC = 10*mean(sd_temp$tc)
    TC_sd = sd(sd_temp$tc)
    var_AR1_avg = mean(sd_temp$varAR1)
    TC_high = 10*(mean(sd_temp$tc) + sqrt(4*TC_sd^2 + var_AR1_avg^2))
    TC_low = 10*(mean(sd_temp$tc) - sqrt(4*TC_sd^2 + var_AR1_avg^2))
    
    temp = data.frame(cbind(cat = cats_proj[i],CO2scen = CO2scens[j],TC, TC_high, TC_low))
    TC_all_hist = rbind(TC_all_hist, temp)
  }#close j loop
}#close i loop
dir = 'Data/'
fname = 'TC_all_hist.rda'
save(TC_all_hist, SD_TC, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#6. MAIN CODE: Calculate implied transient climate response (iTCR) for HISTORICAL data
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'historical.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'TCR_all_proj.rda'
load(file = paste(dir,fname,sep=''))

#Load Dessler and Porter (2017) radiative forcing data, and extrapolate for 2018-19 using ARIMA
dir = 'Raw data/'
df = read_excel(paste(dir,'rf_historical.xlsx',sep=''), sheet = "rf_anthro")
colnames(df) = c('year',seq(1:(ncol(df)-1)))
df = gather(df, rf_member, rf, -year)
df$rf_member = as.numeric(df$rf_member)

rf_new = data.frame()
rfs = unique(df$rf_member)
for(i in 1:length(rfs)){
  df2 = data.frame(df[df$rf_member==i,])
  
  model = auto.arima(df2$rf) #Build time series forecast model using ARIMA
  forecast = predict(model,2)
  
  temp = data.frame(year = as.numeric(c(2018, 2019)), rf_member = as.numeric(c(i, i)), rf = as.numeric(forecast$pred)) #Combine original data with forecasts for 2018-19
  df2 = rbind(df2, temp)
  rf_new = rbind(rf_new, df2)
}
dir = 'Data/'
fname = 'rf_new.rda'
save(rf_new, file = paste(dir,fname,sep=''))

endyear = 2019

SD_TCR = data.frame()
TCR_all_hist = data.frame()
cats_proj = unique(startyears$cat)
cats_hist = unique(hist$cat)
CO2scens = unique(proj$CO2scen)
RFs = unique(rf_new$rf_member)
for(i in 1:length(cats_proj)){ print(paste('i =',i,'out of',length(cats_proj),sep = ' '))  #Do below calculations for each temp projection dataset
  for(j in 1:length(CO2scens)){ #Do below calculations for each CO2 scenario of each temp projection dataset
    
    #To save computation time, only run k for loop if the given projection data set (cats_proj[i]) contains a CO2 scenario CO2scens[j]
    df_temp = TCR_all_proj[TCR_all_proj$cat == cats_proj[i],]
    if(!(CO2scens[j] %in% df_temp$CO2scen)){
      next #next1
    } else{
      
      tcr_temp = data.frame()
      for(k in 1:length(cats_hist)){  print(paste('k =',k,'out of',length(cats_hist),sep = ' ')) #Isolate the subset of each historical temperature dataset (cats_hist[k]).
        temp = hist[hist$cat == toString(cats_hist[k]),]
        temp2 = temp[temp$year>=startyears[i,2] & temp$year<=endyear,]
        temp2$year = as.numeric(temp2$year)
        temp2$dT = as.numeric(temp2$dT)
        
        for(x in 1:length(RFs)){ #For each of the 1000 radiative forcing datasets, match rfs onto corresponding years of historical temp dataset.
          df_temp1 = rf_new[rf_new$rf_member==x,]
          ind = match(temp2$year, df_temp1$year)
          temp3 = cbind(temp2,df_temp1[ind,3])
          colnames(temp3)[ncol(temp3)] = 'rf'
          TCRtemp = lm(dT ~ rf, data = temp3) #OLS correlation of historical temp vs historical rf (for rf dataset x)
          tcr_temp2 = data.frame(cat = toString(cats_hist[k]), CO2scen = toString(CO2scens[j]), rf_member = RFs[x], TCR_val = TCRtemp$coefficients[2], ci_upper = confint(TCRtemp, 'rf', level=0.95)[2]) #Compute TCR and its corresponding 95% upper confidence interval (ci_upper) for given pair of historical temp and historical rf
          tcr_temp = rbind(tcr_temp,tcr_temp2)
          
          #Save out SD_TCR, which summarises TCR results (tcr) and corresponding standard error of each tcr estimate (sd) for each individual combination of projection (cats_proj[i]), projection CO2 scenario (CO2scens[j]),  historical temp dataset (cats_hist[k]), and historical rf dataset (RFs[x]). We will use these combinations to compute Skill scores (SS) later.
          SD_TCR_temp = data.frame(cbind(cat_proj = cats_proj[i], CO2scen = CO2scens[j], cat_hist = cats_hist[k], rf = RFs[x], tcr = 3.7*TCRtemp$coefficients[2], sd = 3.7*summary(TCRtemp)[[4]][2,2]))
          
          if(i==1 & j==1 & k==1 & x==1){
            SD_TCR = SD_TCR_temp
          } else{
            SD_TCR = rbind(SD_TCR,SD_TCR_temp)
          }
          
        }#close x for loop
      }#close k for loop
    }#close next1
    
    tcr_val_avg = mean(tcr_temp$TCR_val)
    TCR_sd = sd(tcr_temp$TCR_val)
    
    tcr_temp$var_OLS = tcr_temp$ci_upper - tcr_temp$TCR_val
    var_OLS_avg = mean(tcr_temp$var_OLS)
    
    #Compute total uncertainty
    uncertainty = sqrt(4*TCR_sd^2 + var_OLS_avg^2) #Total uncertainty (at 2sigma i.e. 95% level) = square root of: (i) (error due to coefficient uncertainty)^2 = (2*s.d. of coefficient)^2 = 4*(ci_OLS_95 - mean)^2 = 4*variance; PLUS (ii) (error due to trend uncertainty)^2 = (2*confidence interval due to autocorrelation)^2 = 4*(ci_AR_95 - mean)^2
    
    TCR = 3.7*tcr_val_avg
    TCR_high = 3.7*(tcr_val_avg + uncertainty)
    TCR_low = 3.7*(tcr_val_avg - uncertainty)
    
    TCR_all_hist_temp = data.frame(cbind(cat = cats_proj[i],CO2scen = CO2scens[j],TCR, TCR_high, TCR_low))
    TCR_all_hist = rbind(TCR_all_hist, TCR_all_hist_temp)
  }#close j loop
}#close i loop
dir = 'Data/'
fname = 'TCR_all_hist.rda'
save(TCR_all_hist, SD_TCR, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#7. MAIN CODE: Calculate 'skill scores' for temperature change
dir = 'Data/'
fname = 'TC_all_proj.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'TC_all_hist.rda'
load(file = paste(dir,fname,sep=''))

colnames(SD_TC)[1] = 'cat'
SS1 = merge(TC_all_proj, SD_TC, by = c("cat","CO2scen"), all.x = TRUE)
SS1 = merge(SS1, startyears, by = "cat")

SS1 = SS1[,c(1,2,9,6,3,7,8)]
colnames(SS1) = c('cat_proj','CO2scen','startyear','cat_hist','TC_proj','TC_hist','sd_TC_hist')
SS1$TC_hist = as.numeric(SS1$TC_hist)
SS1$TC_proj = as.numeric(SS1$TC_proj)
SS1$sd_TC_hist = as.numeric(SS1$sd_TC_hist)
SS2 = SS1 #Create SS2, which will be used to estimate uncertainties of skill scores below.

#Compute skill scores
SS1$E_f = (SS1$TC_hist - SS1$TC_proj)^2
SS1$E_ref = (SS1$TC_hist)^2
SS1$SS = 1 - sqrt(SS1$E_f / SS1$E_ref)

##Compute uncertainties of skill scores
SS3 = SS2[rep(seq_len(nrow(SS2)), each = 100), ] #Replicate each line of SS2 100 times. We will then repopulate its TC_hist cells with 100 random permutations of each central TC_hist value estimated by OLS.
mc.TC_hist = data.frame()
for(i in 1:nrow(SS2)){ #Use this for loop to create the 100 random permutations of TC_hist values.
  set.seed(1)  #Fix random seed for reproducibility in random sampling of Gaussian distribution
  mc.TC_hist_temp = data.frame(rnorm(100,SS2$TC_hist[i],SS2$sd_TC_hist[i])) #Using a Monte Carlo approach, we generate 100 random samples of the Gaussian distribution of OLS regression coefficient TC_hist, based on its standard error, sd_TC_hist.
  mc.TC_hist = rbind(mc.TC_hist, mc.TC_hist_temp)
}
colnames(mc.TC_hist) = 'TC_hist'
SS3 = SS3[,-c(6:7)]
SS3 = cbind(SS3, mc.TC_hist) #Replace 100 duplicate TC_hist values (for each model) with 100 random samples generated above.

#Compute skill scores for all 100 random permutations of TC_hist, and then take 5th and 95th percentiles of those skill scores as uncertainties of skill scores computed above.
SS3$E_f = (SS3$TC_hist - SS3$TC_proj)^2
SS3$E_ref = (SS3$TC_hist)^2
SS3$SS = 1 - sqrt(SS3$E_f / SS3$E_ref)

#Create table of 5th and 95th percentile skill scores for each projection (i.e. model), which we'll then use below to match with median values to produce final table.
mc.SS_TC = data.frame(Model = as.character(), Timeframe = as.character(), Quantile_Skill_dTdt_low = as.numeric(), Quantile_Skill_dTdt_high = as.numeric())
for(i in 1:nrow(TC_all_proj)){
  SS_TC_temp1 = SS3[SS3$cat_proj %in% TC_all_proj$cat[i] & SS3$CO2scen %in% TC_all_proj$CO2scen[i],]
  SS_quantile = quantile(SS_TC_temp1$SS, probs = c(0.05, 0.95)) #Compute 5th and 95th percentile skill scores for each projection (i.e. model) over all 100 perturbations of TC_hist.
  SS_startyear = SS_TC_temp1$startyear[1]
  mc.SS_TC[i,1] = toString(paste(TC_all_proj$cat[i], TC_all_proj$CO2scen[i], sep = ' | '))
  mc.SS_TC[i,2] = SS_startyear
  mc.SS_TC[i,3] = SS_quantile[1]
  mc.SS_TC[i,4] = SS_quantile[2]
}

#Create table of median skill scores
SS_TC = data.frame(Model = as.character(), Timeframe = as.character(), Skill_dTdt = as.numeric())
for(i in 1:nrow(TC_all_proj)){
  SS_TC_temp1 = SS1[SS1$cat_proj %in% TC_all_proj$cat[i] & SS1$CO2scen %in% TC_all_proj$CO2scen[i],]
  SS_median = median(SS_TC_temp1$SS) #Compute median skill score for each projection (i.e. model) over all historical observation data sets.
  SS_startyear = SS_TC_temp1$startyear[1]
  SS_TC[i,1] = toString(paste(TC_all_proj$cat[i], TC_all_proj$CO2scen[i], sep = ' | '))
  SS_TC[i,2] = SS_startyear
  SS_TC[i,3] = SS_median
}

#Match each projection's median data (in SS_TC) with its percentile values (in mc.SS_TC).
ind = match(mc.SS_TC$Model, SS_TC$Model)
SS_TC$quantile_low = mc.SS_TC$Quantile_Skill_dTdt_low[ind]
SS_TC$quantile_high = mc.SS_TC$Quantile_Skill_dTdt_high[ind]

dir = 'Results/Skill Score/'
fname = 'SS_TC.rda'
save(SS_TC, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#8. MAIN CODE: Calculate 'skill scores' for iTCR
dir = 'Data/'
fname = 'TCR_all_proj.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'TCR_all_hist.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

colnames(SD_TCR)[1] = 'cat'
SS1 = merge(TCR_all_proj, SD_TCR, by = c("cat","CO2scen"), all.x = TRUE)
SS1 = merge(SS1, startyears, by = "cat")

SS1 = SS1[,c(1,2,10,6,3,8,9)]
colnames(SS1) = c('cat_proj','CO2scen','startyear','cat_hist','TCR_proj','TCR_hist','sd_TCR_hist')
SS1$TCR_hist = as.numeric(SS1$TCR_hist)
SS1$TCR_proj = as.numeric(SS1$TCR_proj)
SS1$sd_TCR_hist = as.numeric(SS1$sd_TCR_hist)
SS2 = SS1 #Create SS2, which will be used to estimate uncertainties of skill scores below.

#Compute skill scores
SS1$E_f = (SS1$TCR_hist - SS1$TCR_proj)^2
SS1$E_ref = (SS1$TCR_hist)^2
SS1$SS = 1 - sqrt(SS1$E_f / SS1$E_ref)

##Compute uncertainties of skill scores
SS3 = SS2[rep(seq_len(nrow(SS2)), each = 100), ] #Replicate each line of SS2 100 times. We will then repopulate its TCR_hist cells with 100 random permutations of each central TCR_hist value estimated by OLS.
mc.TCR_hist = data.frame()
for(i in 1:nrow(SS2)){ print(paste('i (first) =',i,'out of',nrow(SS2),sep = ' ')) #Use this for loop to create the 100 random permutations of TCR_hist values.
  set.seed(1)  #Fix random seed for reproducibility in random sampling of Gaussian distribution
  mc.TCR_hist_temp = data.frame(rnorm(100,SS2$TCR_hist[i],SS2$sd_TCR_hist[i])) #Using a Monte Carlo approach, we generate 100 random samples of the Gaussian distribution of OLS regression coefficient TCR_hist, based on its standard error, sd_TCR_hist.
  mc.TCR_hist = rbind(mc.TCR_hist, mc.TCR_hist_temp)
}
colnames(mc.TCR_hist) = 'TCR_hist'
SS3 = SS3[,-c(6:7)]
SS3 = cbind(SS3, mc.TCR_hist) #Replace 100 duplicate TCR_hist values (for each model) with 100 random samples generated above.

#Compute skill scores for all 100 random permutations of TCR_hist, and then take 5th and 95th percentiles of those skill scores as uncertainties of skill scores computed above.
SS3$E_f = (SS3$TCR_hist - SS3$TCR_proj)^2
SS3$E_ref = (SS3$TCR_hist)^2
SS3$SS = 1 - sqrt(SS3$E_f / SS3$E_ref)

#Create table of 5th and 95th percentile skill scores for each projection (i.e. model), which we'll then use below to match with median values to produce final table.
mc.SS_TCR = data.frame(Model = as.character(), Timeframe = as.character(), Quantile_Skill_dTdF_low = as.numeric(), Quantile_Skill_dTdF_high = as.numeric())
for(i in 1:nrow(TCR_all_proj)){ print(paste('i (second) =',i,'out of',nrow(TCR_all_proj),sep = ' '))
  SS_TCR_temp1 = SS3[SS3$cat_proj %in% TCR_all_proj$cat[i] & SS3$CO2scen %in% TCR_all_proj$CO2scen[i],]
  SS_quantile = quantile(SS_TCR_temp1$SS, probs = c(0.05, 0.95)) #Compute 5th and 95th percentile skill scores for each projection (i.e. model) over all 100 perturbations of TC_hist.
  SS_startyear = SS_TCR_temp1$startyear[1]
  mc.SS_TCR[i,1] = toString(paste(TCR_all_proj$cat[i], TCR_all_proj$CO2scen[i], sep = ' | '))
  mc.SS_TCR[i,2] = SS_startyear
  mc.SS_TCR[i,3] = SS_quantile[1]
  mc.SS_TCR[i,4] = SS_quantile[2]
}

#Create table of median skill scores
SS_TCR = data.frame(Model = as.character(), Timeframe = as.character(), Skill_dTdF = as.numeric())
for(i in 1:nrow(TCR_all_proj)){
  SS_TCR_temp1 = SS1[SS1$cat_proj %in% TCR_all_proj$cat[i] & SS1$CO2scen %in% TCR_all_proj$CO2scen[i],]
  SS_median = median(SS_TCR_temp1$SS)
  SS_startyear = SS_TCR_temp1$startyear[1]
  SS_TCR[i,1] = toString(paste(TCR_all_proj$cat[i], TCR_all_proj$CO2scen[i], sep = ' | '))
  SS_TCR[i,2] = SS_startyear
  SS_TCR[i,3] = SS_median
}

#Match each projection's median data (in SS_TCR) with its percentile values (in mc.SS_TCR).
ind = match(mc.SS_TCR$Model, SS_TCR$Model)
SS_TCR$quantile_low = mc.SS_TCR$Quantile_Skill_dTdF_low[ind]
SS_TCR$quantile_high = mc.SS_TCR$Quantile_Skill_dTdF_high[ind]

dir = 'Results/Skill Score/'
fname = 'SS_TCR.rda'
save(SS_TCR, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#9. MAIN CODE: Combine 'skill scores' for temperature change and iTCR to produce table S3.
dir = 'Results/Skill Score/'
fname = 'SS_TC.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Results/Skill Score/'
fname = 'SS_TCR.rda'
load(file = paste(dir,fname,sep=''))

#Define functions used for bootstrapping below
f_med = function(data, indices){
  dt = data[indices,]
  c(mean(as.numeric(dt[,3])))
}

f_med2 = function(data, indices){
  dt = data[indices,]
  c(mean(as.numeric(dt[,6])))
}

SS_all = merge(SS_TC, SS_TCR, by = 'Model')
SS_all = SS_all[,-6]
colnames(SS_all) = c('Model','Timeframe', 'Skill_dTdt','Quantile_TC_low','Quantile_TC_high','Skill_dTdF','Quantile_TCR_low','Quantile_TCR_high')
SS_all = SS_all[order(SS_all[,2],SS_all[,1]),] #Order by publication year then by name
SS_all$Timeframe = paste(SS_all$Timeframe, '-2019',sep='')
ind = which(SS_all$Model == '1997 Kheshgi | nominal') #Correct the time frame of 1997 Kheshgi, which, unlike all other models, had results reported only through 2010.
SS_all$Timeframe[ind] = '1997-2010'

#Compute average values and bootstrapped standard error of the mean of each skill score OVER ALL PROJECTIONS
mean_TC = mean(SS_all$`Skill_dTdt`)
mean_TF = mean(SS_all$`Skill_dTdF`)
set.seed(1234)
bootstrap = boot(SS_all, f_med, R=10000)
se_TC = sd(bootstrap$t)
bootstrap = boot(SS_all, f_med2, R=10000)
se_TF = sd(bootstrap$t)

##Compute average values and bootstrapped standard error of the mean of each skill score OVER ALL PROJECTIONS EXCEPT 2 OVERLAPS WITH 18 ACADEMIC/GOVERNMENT PROJECTIONS ANALYSED BY HAUSFATHER ET AL. (2020)
models_Hausfather = c('1982 Weinberg; 1984 Callegari | nominal',
                      '2001 Albritton | nominal')

SS_all_nonHausfather = SS_all[!(SS_all$Model %in% models_Hausfather),]

mean_TC_nonHausfather = mean(SS_all_nonHausfather$`Skill_dTdt`)
mean_TF_nonHausfather = mean(SS_all_nonHausfather$`Skill_dTdF`)
set.seed(1234)
bootstrap = boot(SS_all_nonHausfather, f_med, R=10000)
se_TC_nonHausfather = sd(bootstrap$t)
bootstrap = boot(SS_all_nonHausfather, f_med2, R=10000)
se_TF_nonHausfather = sd(bootstrap$t)

##Compute average values and bootstrapped standard error of the mean of each skill score OVER EXXON'S OWN MODELS ONLY
models_EM = c('1982 Glaser (Figure 3); 1984 Shaw | nominal',
              '1985 Flannery (Page 23) | nominal',
              '1985 Flannery (Page 24) | high',
              '1985 Flannery (Page 24) | low',
              '1985 Flannery (Page 24) | nominal',
              '1985 Hoffert (fig. 5.16) | high',
              '1985 Hoffert (fig. 5.16) | low',
              '1985 Hoffert (fig. 5.16) | nominal',
              '1994 Jain | nominal',
              '1997 Kheshgi | nominal',
              '2003 Kheshgi (fig. 7) | nominal',
              '2003 Kheshgi (fig. 8) | nominal')

SS_all_EM = SS_all[SS_all$Model %in% models_EM,]

mean_TC_EM = mean(SS_all_EM$`Skill_dTdt`)
mean_TF_EM = mean(SS_all_EM$`Skill_dTdF`)
set.seed(1234)
bootstrap = boot(SS_all_EM, f_med, R=10000)
se_TC_EM = sd(bootstrap$t)
bootstrap = boot(SS_all_EM, f_med2, R=10000)
se_TF_EM = sd(bootstrap$t)

#Incorporate quantile values into median skill scores for each model
SS_all$`Skill_dTdt` = paste(round(SS_all$`Skill_dTdt`, 2),' [',round(SS_all$Quantile_TC_low, 2),' to ',round(SS_all$Quantile_TC_high, 2),']', sep = '')
SS_all$`Skill_dTdF` = paste(round(SS_all$`Skill_dTdF`, 2),' [',round(SS_all$Quantile_TCR_low, 2),' to ',round(SS_all$Quantile_TCR_high, 2),']', sep = '')

#Add in average values of each skill score OVER ALL PROJECTIONS
rowtotal = nrow(SS_all)
SS_all[rowtotal+1,1] = 'All projections: AVERAGE'
SS_all[rowtotal+1,3] = paste(round(mean_TC, 2),' [',round((mean_TC-se_TC), 2),' to ',round((mean_TC+se_TC), 2),']', sep = '')
SS_all[rowtotal+1,6] = paste(round(mean_TF, 2),' [',round((mean_TF-se_TF), 2),' to ',round((mean_TF+se_TF), 2),']', sep = '')

#Add in average values of each skill score OVER ALL PROJECTIONS EXCEPT 2 OVERLAPS WITH 18 ACADEMIC/GOVERNMENT PROJECTIONS ANALYSED BY HAUSFATHER ET AL. (2020)
rowtotal = nrow(SS_all)
SS_all[rowtotal+1,1] = 'All projections bar 2 overlaps with Hausfather et al.: AVERAGE'
SS_all[rowtotal+1,3] = paste(round(mean_TC_nonHausfather, 2),' [',round((mean_TC_nonHausfather-se_TC_nonHausfather), 2),' to ',round((mean_TC_nonHausfather+se_TC_nonHausfather), 2),']', sep = '')
SS_all[rowtotal+1,6] = paste(round(mean_TF_nonHausfather, 2),' [',round((mean_TF_nonHausfather-se_TF_nonHausfather), 2),' to ',round((mean_TF_nonHausfather+se_TF_nonHausfather), 2),']', sep = '')

#Add in average values of each skill score OVER EXXON'S OWN MODELS ONLY
rowtotal = nrow(SS_all)
SS_all[rowtotal+1,1] = 'Exxon/ExxonMobil models: AVERAGE'
SS_all[rowtotal+1,3] = paste(round(mean_TC_EM, 2),' [',round((mean_TC_EM-se_TC_EM), 2),' to ',round((mean_TC_EM+se_TC_EM), 2),']', sep = '')
SS_all[rowtotal+1,6] = paste(round(mean_TF_EM, 2),' [',round((mean_TF_EM-se_TF_EM), 2),' to ',round((mean_TF_EM+se_TF_EM), 2),']', sep = '')

SS_all = SS_all[,-c(4:5,7:8)]

dir = 'Results/Skill Score/'
fname = 'SS_all.rda'
fname2 = 'SS_all.csv'
save(SS_all, file = paste(dir,fname,sep=''))
write.csv(SS_all, file = paste(dir,fname2,sep=''))
###############################################################################################################################################################################
#10. MAIN CODE: Calculate difference (projected minus observed) in temperature change per decade
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'historical.rda'
load(file = paste(dir,fname,sep=''))

endyear = 2019

TC_all_diff = data.frame()
cats_proj = unique(startyears$cat)
cats_hist = unique(hist$cat)
CO2scens = unique(proj$CO2scen)
for(i in 1:length(cats_proj)){
  for(j in 1:length(CO2scens)){
    temp3 = proj[proj$cat == toString(cats_proj[i]) & proj$CO2scen == toString(CO2scens[j]),]
    if(nrow(temp3) == 0){
      next
    } else{ #else1
      
      if(nrow(temp3) == 2){ #For the occasional case where we have only 2 data points (notably 1997 Kheshgi), simply calculate TC from the straight line joining those two points. Since for i=12, startyear is not included in raw data, simply retain original data. 
        temp4 = temp3
      } else{
        temp4 = temp3[temp3$year>=startyears[i,2] & temp3$year<=endyear,]
      }
      
      df = list()
      #If temp data has no sd error bars, then simply fit to the original data points.
      if(sum(is.na(temp4$sd_high)) == nrow(temp4) & sum(is.na(temp4$sd_low)) == nrow(temp4)){
        df[[1]] = data.frame()
      } else{
        df[[1]] = data.frame()
        df[[2]] = data.frame()
        df[[3]] = data.frame()
      }
      
      sd_temp_x2 = data.frame(matrix(NA, nrow = 3, ncol = 4)) #length(cats_hist)=5 for the 5 historical records. length(df) = 1 or 3.
      colnames(sd_temp_x2) = c('x','tcdiff','varTREND','coef_sd')
      sd_temp_x = data.frame(cat = character(), CO2scen = character(), tc = double(), varAR1 = double())
      for(x in 1:length(df)){
        
        sd_temp_k = data.frame(cat = character(), CO2scen = character(), tc = double(), varAR1 = double())
        for(k in 1:length(cats_hist)){
          temp = hist[hist$cat == toString(cats_hist[k]),]
          
          if(nrow(temp4) == 2){ #For the occasional case where we have only 2 data points (notably 1997 Kheshgi), simply calculate TC from the straight line joining those two points. Therefore select corresponding historical temperature data here.
            temp2 = temp[temp$year==temp4$year[1] | temp$year==temp4$year[2],]
          } else{
            temp2 = temp[temp$year>=startyears[i,2] & temp$year<=endyear,]
          }
          temp2$year = as.numeric(temp2$year)
          
          temp5 = merge(temp2, temp4, by = 'year')
          temp5 = temp5[,c(1,2,6:11,5)]
          colnames(temp5) = c('year','dT_hist','dT_proj','sd_high','sd_low','CO2','CO2scen','cat_proj','cat_hist')
          temp5$dT_diff = temp5$dT_proj - temp5$dT_hist
          
          if(nrow(temp5) == 2){ #For the occasional case where we have only 2 data points (notably 1997 Kheshgi), simply calculate TC from the straight line joining those two points.
            TCdifftemp = (temp5$dT_diff[2] - temp5$dT_diff[1])/(temp5$year[2] - temp5$year[1])
            sd_temp_k[k,1] = toString(cats_hist[k])
            sd_temp_k[k,2] = toString(CO2scens[j])
            sd_temp_k[k,3] = TCdifftemp
            sd_temp_k[k,4] = as.numeric(NA)
          } else{ #else A
            
            #Run OLS model
            if(length(df) == 1){
              df[[1]] = temp5$dT_diff
              TCdifftemp = lm(dT_diff ~ year, data = temp5)
            }
            if(length(df) == 3){
              if(x == 1){
                df[[1]] = temp5$dT_diff
                TCdifftemp = lm(dT_diff ~ year, data = temp5)
              }
              if(x == 2){
                df[[2]] = temp5$dT_diff + temp5$sd_high
                TCdifftemp = lm(dT_diff+sd_high ~ year, data = temp5)
              }
              if(x == 3){
                df[[3]] = temp5$dT_diff - temp5$sd_low
                TCdifftemp = lm(dT_diff-sd_low ~ year, data = temp5)
              }
            }
            TC_val = TCdifftemp$coefficients[2]  
            
            
            #Run AR(1) model
            var_AR1 = arima(df[[x]], 
                            order = c(1,0,0),
                            xreg = temp5$year)
            
            #Compute confidence intervals for each regression method
            ols_ci = confint(TCdifftemp, 'year', level=0.95)[2] - TCdifftemp$coefficients[2] #Compute confidence interval span for OLS regression.
            arima_ci = confint(var_AR1, level = 0.95)[3,2] - var_AR1$coef[3]
            
            if(arima_ci > ols_ci){
              ci_upper = confint(var_AR1, level = 0.95)[3,2] - var_AR1$coef[3] + TCdifftemp$coefficients[2]
              ci_lower = confint(var_AR1, level = 0.95)[3,1] - var_AR1$coef[3] + TCdifftemp$coefficients[2]
            }
            else{
              ci_upper = confint(TCdifftemp, 'year', level=0.95)[2]
              ci_lower = confint(TCdifftemp, 'year', level=0.95)[1]
            }
            ci_val = ci_upper - TCdifftemp$coefficients[2]
            sd_temp_k[k,4] = ci_val
            
            sd_temp_k[k,1] = toString(cats_hist[k])
            sd_temp_k[k,2] = toString(CO2scens[j])
            sd_temp_k[k,3] = TCdifftemp$coefficients[2]
          }# close else A
        }#close k loop
        
        if(nrow(temp5) == 2){ #For the occasional case where we have only 2 data points (notably 1997 Kheshgi), calculate TC errors based only on coefficient uncertainty (s.d. over five historical temperature data sets)
          TCdiff = 10*mean(sd_temp_k$tc) 
          TCdiff_sd = sd(sd_temp_k$tc)
          uncertainty = sqrt(4*TCdiff_sd^2)
          TCdiff_high = TCdiff + 10*uncertainty
          TCdiff_low = TCdiff - 10*uncertainty
        } else{ #else A
          
          if(length(df) == 1){
            TCdiff = 10*mean(sd_temp_k$tc) 
            TCdiff_sd = sd(sd_temp_k$tc)
            var_AR1_avg = mean(sd_temp_k$varAR1)
            uncertainty = sqrt(4*TCdiff_sd^2 + var_AR1_avg^2)
            TCdiff_high = TCdiff + 10*uncertainty
            TCdiff_low = TCdiff - 10*uncertainty
          }
          
          if(length(df) == 3){
            sd_temp_x2$x[x] = x
            sd_temp_x2$tcdiff[x] = mean(sd_temp_k$tc) 
            sd_temp_x2$varTREND[x] = mean(sd_temp_k$varAR1)
            sd_temp_x2$coef_sd[x] = sd(sd_temp_k$tc)
            
            sd_temp_x = rbind(sd_temp_x,sd_temp_k)
            
            if(x == 1){
              TCdiff = 10*mean(sd_temp_k$tc) 
            }
            if(x == 2){
              uncertainty_high = sqrt((2*sd(sd_temp_k$tc))^2 + (mean(sd_temp_k$varAR1))^2)
              TCdiff_high = 10*(mean(sd_temp_k$tc) + uncertainty_high)
            }
            if(x == 3){
              uncertainty_low = sqrt((2*sd(sd_temp_k$tc))^2 + (mean(sd_temp_k$varAR1))^2)
              TCdiff_low = 10*(mean(sd_temp_k$tc) - uncertainty_low)
            }
          }
        }# Close else A
        
      }#close x for loop
      
      temp = data.frame(cbind(cat = cats_proj[i],CO2scen = CO2scens[j],TCdiff, TCdiff_high, TCdiff_low))
      TC_all_diff = rbind(TC_all_diff, temp)
    }#close else1
    
  }#close j loop
}#close i loop

#Create a summary spreadsheet to be printed, including 'yes/no' result of whether there is statistical overlap between observations and projections
TC_all_diff_summary = TC_all_diff
TC_all_diff_summary$overlap = 'NA'
for(i in 1:nrow(TC_all_diff_summary)){
  if(TC_all_diff_summary$TCdiff_high[i] >=0 & TC_all_diff_summary$TCdiff_low[i] <=0){
    TC_all_diff_summary$overlap[i] = 'yes'
  } else{
    TC_all_diff_summary$overlap[i] = 'no'
  }
}

dir = 'Data/'
fname = 'TC_all_diff.rda'
save(TC_all_diff, file = paste(dir,fname,sep=''))

dir = 'Results/Temp Change/'
fname2 = 'TC_all_diff.csv'
write.csv(TC_all_diff_summary, file = paste(dir,fname2,sep=''))
###############################################################################################################################################################################
#11. MAIN CODE: Calculate difference (projected minus observed) in iTCR
dir = 'Data/'
fname = 'TCR_all_proj.rda'
load(file = paste(dir,fname,sep=''))

dir = 'Data/'
fname = 'TCR_all_hist.rda'
load(file = paste(dir,fname,sep=''))

TCR_all_diff = data.frame()
cats_proj = unique(TCR_all_proj$cat)
CO2scens = unique(TCR_all_proj$CO2scen)
for(i in 1:length(cats_proj)){
  for(j in 1:length(CO2scens)){
    temp_proj = TCR_all_proj[TCR_all_proj$cat == toString(cats_proj[i]) & TCR_all_proj$CO2scen == toString(CO2scens[j]),]
    temp_hist = TCR_all_hist[TCR_all_hist$cat == toString(cats_proj[i]) & TCR_all_hist$CO2scen == toString(CO2scens[j]),]
    
    temp_proj[,c(3:ncol(temp_proj))] = sapply(temp_proj[,c(3:ncol(temp_proj))],as.numeric)
    temp_hist[,c(3:ncol(temp_hist))] = sapply(temp_hist[,c(3:ncol(temp_hist))],as.numeric)
    
    if(nrow(temp_proj) == 0){
      next
    } else{ #else1
      TCRdiff = temp_proj$TCR - temp_hist$TCR
      TCRdiff_high = TCRdiff + sqrt((temp_proj$TCR_high - temp_proj$TCR)^2 + (temp_hist$TCR_high - temp_hist$TCR)^2)
      TCRdiff_low = TCRdiff - sqrt((temp_proj$TCR - temp_proj$TCR_low)^2 + (temp_hist$TCR - temp_hist$TCR_low)^2)
      
      temp = data.frame(cbind(cat = cats_proj[i],CO2scen = CO2scens[j],TCRdiff, TCRdiff_high, TCRdiff_low))
      TCR_all_diff = rbind(TCR_all_diff, temp)
    }#close else1
    
  }#close j for loop
}#close i for loop

#Create a summary spreadsheet to be printed, including 'yes/no' result of whether there is statistical overlap between observations and projections
TCR_all_diff_summary = TCR_all_diff
TCR_all_diff_summary$overlap = 'NA'
for(i in 1:nrow(TCR_all_diff_summary)){
  if(TCR_all_diff_summary$TCRdiff_high[i] >=0 & TCR_all_diff_summary$TCRdiff_low[i] <=0){
    TCR_all_diff_summary$overlap[i] = 'yes'
  } else{
    TCR_all_diff_summary$overlap[i] = 'no'
  }
}

dir = 'Data/'
fname = 'TCR_all_diff.rda'
save(TCR_all_diff, file = paste(dir,fname,sep=''))

dir = 'Results/iTCR/'
fname2 = 'TCR_all_diff.csv'
write.csv(TCR_all_diff_summary, file = paste(dir,fname2,sep=''))
###############################################################################################################################################################################
#12. MAIN CODE: Calculate temperature change corresponding to an increase in atmospheric CO2 concentration from 450ppm to 600ppm, as indicated by Glaser (1982, fig.3) and Shaw (1984). This value is stated in the main text: "Exxon’s 1982 projection shown in fig. 1A, for example, suggests that 600 ppm of atmospheric CO2 would lead to 1.3°C more global warming than 450 ppm." 
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

df = proj[proj$cat == '1982 Glaser (Figure 3); 1984 Shaw' & proj$CO2scen == 'nominal',]
interp = data.frame(approx(df$CO2, df$dT, xout = seq(400,620,1))) #Interpolate temperatures corresponding to integer values of CO2 concentration
diff = interp[interp$x == 600,2] - interp[interp$x == 450,2]
dir = 'Results/Temp Change/'
fname = 'TC_1982GlaserFig3_&_1984Shaw.csv'
write.csv(diff, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#13. MAIN CODE: Calculate bootstrapped standard error of the median year by which Exxon predicted AGW would be detectable. This is quoted in the main text: "Ten internal reports and one peer-reviewed publication spanning 1979-85 offer quantitative estimates, with a median year of 2000 ± 5."
dir = 'Raw data/'
df = read_excel(paste(dir,'Detectable_warming_projected.xlsx',sep=''))
df2 = data.frame(df[c(1:11),c(2,5)])

f_med = function(data, indices){
  dt = data[indices,]
  c(median(dt[,2]))
}

set.seed(1234)
bootstrap = boot(df2, f_med, R=10000)

median = data.frame(median = as.numeric(bootstrap$t0), se = as.numeric(round(1.96*sd(bootstrap$t))))
dir = 'Results/Detectable Warming/'
fname = 'YearDetectable.csv'
write.csv(median, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#14. MAIN CODE: Calculate the ratio of each rate of external forcing increase reported by ExxonMobil versus the corresponding mean estimate of observational forcings over the projection period. This is quoted in the main text: "Nonetheless, following Hausfather et al. (2020), we observe that with two exceptions (Kheshgi and Jain (2003), figs. 7c and 8c), all of the nominal CO2 scenario projections reported by ExxonMobil have rates of external forcing increase in the projection period within 1.4 times of the mean estimate of observational forcings, and thus likely exist in the regime where iTCR depends largely on radiative feedbacks and ocean heat uptake."

#Compute dF/dt for each projection
dir = 'Data/'
fname = 'projections.rda'
load(file = paste(dir,fname,sep=''))

endyear = 2019

dF_proj = data.frame()
cats = unique(proj$cat)
CO2scens = unique(proj$CO2scen)
for(i in 1:length(cats)){
  for(j in 1:length(CO2scens)){
    
    temp = proj[proj$cat == toString(cats[i]),]
    tempb = temp[temp$CO2scen == toString(CO2scens[j]),]
    
    if(nrow(tempb) == 0){
      next
    }else{ #else1
      if(cats[i] == '2001 Albritton'){ #'2001 Albritton' dataset comes with forcings already provided (instead of CO2)
        tempb$F = tempb$CO2
        tempb = tempb[,-5]
      } 
      else{ #else2
        tempb$F = 5.35*log(tempb$CO2 / tempb$CO2[1])
        
        if(cats[i] == '1977 Black; 1979 Mastracchio' | cats[i] == '1980 Shaw; 1982 Glaser (Figure9)'){ #For documents i=1 and i=2, CO2 values are available only for certain years. Therefore only compute F values starting from the first year for which we have CO2 data. 
          ind = which(!is.na(tempb$CO2))[1] #Find first non-NA value of CO2 data
          tempb$F = 5.35*log(tempb$CO2 / tempb$CO2[ind])
          
          if(cats[i] == '1980 Shaw; 1982 Glaser (Figure9)'){ #For document i=2, we need to linearly scale computed F values (computed from explicitly stated CO2 data) by temperature in order to obtain F values within the time period of interest (startyear through endyear).
            ind2 = which(!is.na(tempb$CO2))[2] #Find second non-NA value of CO2 data
            ind3 = which(tempb$year==endyear)
            for(z in seq(ind+1,ind3,1)){
              tempb$F[z] = (tempb$dT[z] / tempb$dT[ind2]) * tempb$F[ind2] #Linearly scale F by temperatures in order to get F for endyear (2019)
            }
          }
        }
      }#close else2
    }#close else1
    
    if(cats[i] == '1997 Kheshgi'){ #1997 Kheshgi contains only one data point in between startyears[i,2] and endyear (second data point falls before startyears[i,2]). Therefore retain both years to compute linear rate of change in forcing.
      temp2 = tempb
    } else{
      temp2 = tempb[tempb$year>=startyears[i,2] & tempb$year<=endyear,]
    }
    temp2 = temp2[,c('year','cat','CO2scen','F')]
    dF_proj = rbind(dF_proj,temp2)
    
  }#close j loop
}#close i loop

summary_dFdt = data.frame()
for(i in 1:length(cats)){
  for(j in 1:length(CO2scens)){
    temp3 = dF_proj[dF_proj$cat == toString(cats[i]),]
    temp3 = temp3[temp3$CO2scen == toString(CO2scens[j]),]
    
    if(nrow(temp3) == 0){
      next
    }else{ #else1
      
      #Run OLS model
      dFdt_proj_temp = lm(F ~ year, data = temp3)
      dFdt_val = dFdt_proj_temp$coefficients[2]
      
      summary = data.frame(cbind(cat = cats[i], year_start = temp3[1,c('year')], year_end = temp3[nrow(temp3),c('year')], CO2scen = CO2scens[j],dFdt = dFdt_val))
      summary_dFdt = rbind(summary_dFdt, summary)
    }#close else1
  } #close j loop
} #close i loop

#Compute mean of dF/dt for historical observations
dir = 'Data/'
fname = 'rf_new.rda'
load(file = paste(dir,fname,sep=''))

summary_dFdt_hist = data.frame()
rfs = unique(df$rf_member)
for(i in 1:nrow(summary_dFdt)){
  
  dFdt_hist = data.frame()
  for(k in 1:length(rfs)){
    temp = rf_new[rf_new$rf_member == rfs[k],]
    temp = temp[temp$year>=summary_dFdt[i,c('year_start')] & temp$year<=summary_dFdt[i,c('year_end')],]
    
    #Run OLS model
    dFdt_hist_temp = lm(rf ~ year, data = temp)
    dFdt_val = dFdt_hist_temp$coefficients[2]
    dFdt_hist_temp2 = data.frame(cbind(cat = summary_dFdt[i,c('cat')], CO2scen = summary_dFdt[i,c('CO2scen')], rf_member = rfs[k], dFdt = dFdt_val))
    
    dFdt_hist = rbind(dFdt_hist, dFdt_hist_temp2)
    
  }#close k loop
  
  dFdt_hist_mean = mean(as.numeric(dFdt_hist$dFdt))
  summary_dFdt_hist_temp = data.frame(cbind(cat = summary_dFdt[i,c('cat')], CO2scen = summary_dFdt[i,c('CO2scen')], dFdt_hist_mean = dFdt_hist_mean))
  summary_dFdt_hist = rbind(summary_dFdt_hist, summary_dFdt_hist_temp)
  
}#close i loop

#Calculate ratio of dF/dt for projections versus historical observations
summary_dFdt_proj_vs_hist = merge(summary_dFdt, summary_dFdt_hist, by = c("cat","CO2scen"), all.x = TRUE)
summary_dFdt_proj_vs_hist$ratio = as.numeric(summary_dFdt_proj_vs_hist$dFdt_hist_mean)/as.numeric(summary_dFdt_proj_vs_hist$dFdt)

dir = 'Results/dFdt_proj_vs_hist/'
fname = 'dFdt_proj_vs_hist.csv'
write.csv(summary_dFdt_proj_vs_hist, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################